/*****************************************************************************
Description:

This program merges the relevant data files to create the base file for each
year and stacks them together to have the base file for all years.
	I. 		Merge in Enrollment
	II. 	Merge in Demo characteristics
	III. 	Merge in baselines
	IV. 	Merge in outcomes
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/

args bw

* 	append pscores

	clear all

	set obs 1
	forval year = 2012/2014 {
		preserve
		use "${cleandata}school_level_pscores_`bw'_`year'.dta", clear

		// running variables
		*tostring stu, replace
		merge 1:1 stu using "${cleandata}runvar_dataset_program_pscore_`bw'_`year'.dta", gen(rv_merge)

		gen year_app = `year'
		gen grade = 9

		tempfile pscores_`year'
		save "`pscores_`year''"
		restore

		append using "`pscores_`year''"
	}
	drop in 1
	tempfile pscores
	save "`pscores'"

*	load enrollment file

	use "${cleandata}demo_all_years_all_grade.dta", clear
	bys stu grade (year): gen grade_attempt = _n

	//grade restriction
	drop if inlist(grade, "0K", "AD", "IN", "PK")
	destring grade, replace
	keep if inrange(grade, 9, 12)

	//year restriction (first 9th grade entry during the sample period)
	bys stu: egen right_cohort = max(grade == 9  &  inrange(year,2013,2015)  &  grade_attempt == 1)

*	merge in pscores

	//preliminaries
	gen year_app = year - 1
	destring stu, replace

	//have to enroll the year after application in 9th grade to be merged
	merge m:1 stu year_app grade using "`pscores'", gen(pscore_merge) keep(1 3)

***	outcomes

*	SAT Scores
	merge m:1 stu using "${cleandata}sat_nyc_cleaned.dta", keep(1 3) gen(sat_merge)
	gen took_sat = sat_merge != 1
	tostring stu, replace

***	controls

* 	demos
	tostring stu, replace
	merge m:1 stu using "${cleandata}demo_info_08th_grade.dta", keep(1 3) gen(demo_merge) keepusing(bl_* enr_*)
	destring swd ell , replace

* 	6th grade baseline scores
	local score_vars raw_score_ela raw_score_math scale_score_ela scale_score_math test_grade_ela test_grade_math bl_ss_math bl_ss_ela
	merge m:1 stu using "${cleandata}baseline_scores_standardized_grade6.dta", keep(1 3) gen(base_sc_merge_6) keepusing(`score_vars')
	foreach var of varlist `score_vars' {
		ren `var' `var'_6
	}

* 	8th grade baseline scores
	merge m:1 stu using "${cleandata}baseline_scores_standardized_grade8.dta", keep(1 3) gen(base_sc_merge_8) keepusing(`score_vars')
	foreach var of varlist `score_vars' {
		ren `var' `var'_8
	}

	//must take test in HS
	bys stu (year): egen year_9th_grade = min(year*1/(grade == 9))
	foreach subject in total ela math {
		qui replace sat_`subject' = . if year_sat < year_9th_grade
	}

*	enrollment
	levelsof sch, local(allschools)
	foreach sch in `allschools'{
		qui gen enr_`sch' = (sch == "`sch'")
	}

* 	clean up

	destring stu bl_swd, replace

	//fill in missings
	qui mvencode enr_* pscore_* offer_* rv_*  , mv(0) overr

	//drop constant runvars
	foreach rv of varlist rv*{
		qui: su `rv', meanonly
		if `r(mean)' == 0 drop `rv'
	}

*	rename

	//test scores
	rename (raw_score_ela_8 raw_score_math_8 scale_score_ela_8 scale_score_math_8 test_grade_ela_8 test_grade_math_8) ///
		(bl_raw_ela bl_raw_math bl_scale_ela bl_scale_math bl_test_grade_ela bl_test_grade_math)
	rename (sat_math sat_ela bl_ss_math_8 bl_ss_ela_8 sat_math_raw sat_ela_raw) ///
		   (math ela bl_math bl_ela math_raw ela_raw)
	drop sat_total_*
	//enrollment
	rename enr_08* geo_enr_08*
	rename *swd *sped
	//pscores
	rename pscore_*_md1_* pscore_*_*
	rename pscore_*_md2_* pscore_*2_*
	rename pscore_*_pnp_* pscore_*3_*

*	keep only 9th grade observations (already merged to future outcomes and exposures)
	keep if grade == 9
	cap drop __*

*	save
	qui compress
	save "${cleandata}NYC_basefile_`bw'.dta", replace
